mutate(pcto_all=vote_all/reg_all,
pcto=turnout/reg_all,
pcto.elig=turnout/cvap,
yr1992=year==1992,
yr1996=year==1996,
yr2000=year==2000,
yr2004=year==2004,
yr2008=year==2008,
yr2012=year==2012,
yr2016=year==2016,
yr2020=year==2020,
cases_mn_centered=cases_mn - mean(cases_mn, na.rm=T),
deaths_mn_centered=deaths_mn - mean(deaths_mn, na.rm=T),
cases_sum_centered=cases_sum - mean(cases_sum, na.rm=T),
deaths_sum_centered=deaths_sum - mean(deaths_sum, na.rm=T),
avr=ifelse(statenm %in% c("Georgia","Oregon") & year<2016, 0, avr),
avr=ifelse(statenm %in% "Delaware" & year<=2008, 0, avr),
yrcount=as.integer(as.factor(year))-1) %>%
mutate(pcto=ifelse(is.infinite(pcto), NA, pcto),
pcto.elig=ifelse(is.infinite(pcto.elig), NA, pcto.elig)) %>%
mutate(pcto.elig=ifelse(pcto.elig>1, 1, pcto.elig))
#vermont votes where missing in 2018
vt_extra <- read_csv("vt_2018_votes.csv") %>%
mutate(stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"))
switch_vbm_all <- left_join(switch_vbm_all,
rbind.fill(d, vt_extra) %>% dplyr::select(cntynm, stfips:year, statenm, vote_all)) %>%
mutate(pcvbm=vbm/vote_all,
first_yr_vbm_all=year + 4 - year%%4) %>%
dplyr::select(-vote_all, -year)
#create lag and lead variables for event studies#
d <- left_join(d, switch_vbm_all) %>%
left_join(switch_vbm_app) %>%
left_join(switch_vbm_noex) %>%
left_join(switch_vbm_perm) %>%
left_join(switch_avr) %>%
left_join(switch_edr) %>%
left_join(switch_early) %>%
mutate(pcvbm=ifelse(vbm_all==1, pcvbm, 0),
cycles_to_vbm_all=ifelse(is.na(first_yr_vbm_all), -1, (year-first_yr_vbm_all)/4),
cycles_to_vbm_app=ifelse(is.na(first_yr_vbm_app), -1, (year-first_yr_vbm_app)/4),
cycles_to_vbm_noex=ifelse(is.na(first_yr_vbm_noex), -1, (year-first_yr_vbm_noex)/4),
cycles_to_vbm_perm=ifelse(is.na(first_yr_vbm_perm), -1, (year-first_yr_vbm_perm)/4),
cycles_to_avr=ifelse(is.na(first_yr_avr), -1, (year-first_yr_avr)/4),
cycles_to_edr=ifelse(is.na(first_yr_edr), -1, (year-first_yr_edr)/4),
cycles_to_early=ifelse(is.na(first_yr_early), -1, (year-first_yr_early)/4)) %>%
mutate(vbm_perm=as.integer(cycles_to_vbm_perm>=0),
vbm_all_m3=cycles_to_vbm_all<=-3,
vbm_all_m2=cycles_to_vbm_all==-2,
vbm_all_m1=cycles_to_vbm_all==-1,
vbm_all_p0=cycles_to_vbm_all==0,
vbm_all_p1=cycles_to_vbm_all==1,
vbm_all_p2=cycles_to_vbm_all>=2,
vbm_noex_m3=cycles_to_vbm_noex<=-3,
vbm_noex_m2=cycles_to_vbm_noex==-2,
vbm_noex_m1=cycles_to_vbm_noex==-1,
vbm_noex_p0=cycles_to_vbm_noex==0,
vbm_noex_p1=cycles_to_vbm_noex==1,
vbm_noex_p2=cycles_to_vbm_noex>=2,
vbm_perm_m3=cycles_to_vbm_perm<=-3,
vbm_perm_m2=cycles_to_vbm_perm==-2,
vbm_perm_m1=cycles_to_vbm_perm==-1,
vbm_perm_p0=cycles_to_vbm_perm==0,
vbm_perm_p1=cycles_to_vbm_perm==1,
vbm_perm_p2=cycles_to_vbm_perm>=2,
vbm_app_m3=cycles_to_vbm_app<=-3,
vbm_app_m2=cycles_to_vbm_app==-2,
vbm_app_m1=cycles_to_vbm_app==-1,
vbm_app_p0=cycles_to_vbm_app>=0,
avr_m3=cycles_to_avr<=-3,
avr_m2=cycles_to_avr==-2,
avr_m1=cycles_to_avr==-1,
avr_p0=cycles_to_avr==0,
avr_p1=cycles_to_avr>=1,
edr_m3=cycles_to_edr<=-3,
edr_m2=cycles_to_edr==-2,
edr_m1=cycles_to_edr==-1,
edr_p0=cycles_to_edr==0,
edr_p1=cycles_to_edr==1,
edr_p2=cycles_to_edr>=2,
early_m3=cycles_to_early<=-3,
early_m2=cycles_to_early==-2,
early_m1=cycles_to_early==-1,
early_p0=cycles_to_early==0,
early_p1=cycles_to_early==1,
early_p2=cycles_to_early>=2) %>%
mutate(vbm_perm=ifelse(vbm_all==1, 1, vbm_perm)) %>%
filter(cntynm!="Overseas")
library(foreign)
library(haven)
library(tidyverse)
library(dplyr)
library(plyr)
library(readxl)
library(parallel)
library(plm)
library(multiwayvcov)
library(lmtest)
library(car)
library(PanelMatch)
library(estimatr)
#############
##FUNCTIONS##
#############
interact.sum <- function(model, vars) {
coef <- sum(summary(model)$coefficients[vars,1])
variance <- sum(diag(vcovHC(model, type="HC1")[vars,vars])) +
2*vcovHC(model, type="HC1")[vars,vars][2,1]
se <- sqrt(variance)
output <- data.frame(coef=coef, se=se)
rownames(output) <- paste(vars, collapse="+")
return(output)
}
rmse_calculator <- function(i, type) {
file <- paste0(type,"_model_paper", i, ".rds")
m <- readRDS(file)[[1]]
rmse <- sqrt(sum(m$residuals^2)/m$df.residual)
n <- pdim(m)$nT$N
return(data.frame(model=i, rmse=rmse, n=n))
}
lags <- function(data, var) {
n_lags <- length(data[,var])-1
if(n_lags>0) {
for(i in 1:n_lags) {
data[,paste0(var,"_l",i)] <- c(rep(NA,i), data[1:(length(data[,var])-i),var])
}
}
return(data)
}
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
########################
##DATA LOAD AND FORMAT##
########################
#Set working directory to local data folder#
workingDirectory <- "/Users/ericmcghee/Dropbox/PPIC/UCF/Post-election Covid/academic paper/data"
setwd(workingDirectory)
##EAC reports for vbm counts##
e08 <- read_excel("2008 eavs xls august 11 2010/County_Excel/Combined_SectionC.xls") %>%
dplyr::select(STATE_, JurisName, JurisID, C1b, C2) %>%
dplyr::rename(stpost=STATE_, cntynm=JurisName, geoid=JurisID, vbm=C1b) %>%
mutate(geoid=str_sub(geoid, 1, 5), year=2008) %>%
group_by(year, stpost, geoid) %>%
dplyr::summarize(vbm=sum(vbm, na.rm=T)) %>%
filter(!stpost %in% c("AS","GU","PR","VI"), !is.na(vbm), vbm>0)
e12 <- read.dbf(unzip("DBF Files 2012.zip", "DBF Files/Section C_C1-C4.dbf"), as.is=T) %>%
dplyr::select(STATE, JURISDICTI, FIPSCODE, QC1B) %>%
dplyr::rename(stpost=STATE, cntynm=JURISDICTI, geoid=FIPSCODE, vbm=QC1B) %>%
mutate(geoid=str_sub(geoid, 1, 5), year=2012, stpost=ifelse(stpost=="Oh", "OH", stpost)) %>%
group_by(year, stpost, geoid) %>%
dplyr::summarize(vbm=sum(vbm, na.rm=T)) %>%
filter(!(stpost %in% c("AS","GU","PR","VI")))
e14 <- read.dbf(unzip("2014_EAVS_DBF_Files1.zip", "EAVS_Section_C.dbf"), as.is=T) %>%
dplyr::select(STATE, JURISDICTI, FIPSCODE, QC1B) %>%
dplyr::rename(stpost=STATE, cntynm=JURISDICTI, geoid=FIPSCODE, vbm=QC1B) %>%
mutate(geoid=str_sub(geoid, 1, 5), year=2014) %>%
group_by(year, stpost, geoid) %>%
dplyr::summarize(vbm=sum(vbm, na.rm=T)) %>%
filter(!(stpost %in% c("AS","GU","PR","VI")))
e16 <- read_csv(unzip("EAVS_2016_Final_Data_for_Public_Release_v2.csv.zip", "EAVS 2016 Final Data for Public Release v.3.csv")) %>%
dplyr::select(State, JurisdictionName, FIPSCode, C1b) %>%
dplyr::rename(stpost=State, cntynm=JurisdictionName, geoid=FIPSCode, vbm=C1b) %>%
mutate(geoid=as.character(format(geoid, scientific=F))) %>%
mutate(geoid=str_trim(geoid)) %>%
mutate(geoid=str_pad(str_sub(as.character(geoid), 1, -6), width=5, side="left", pad="0"),
vbm=parse_number(vbm), year=2016) %>%
group_by(year, stpost, geoid) %>%
dplyr::summarize(vbm=sum(vbm, na.rm=T)) %>%
filter(!(stpost %in% c("AS","GU","PR","VI","WI")))
e18 <- read_csv("EAVS_2018_for_Public_Release_Updates3.csv") %>%
dplyr::select(State_Abbr, Jurisdiction_Name, FIPSCode, C1b) %>%
dplyr::rename(stpost=State_Abbr, cntynm=Jurisdiction_Name, geoid=FIPSCode, vbm=C1b) %>%
mutate(geoid=str_sub(geoid, 1, 5), vbm=parse_number(vbm), year=2018) %>%
group_by(year, stpost, geoid) %>%
dplyr::summarize(vbm=sum(vbm, na.rm=T)) %>%
filter(!(stpost %in% c("AS","GU","PR","VI","WI")))
#combining eavs data for each year#
#wi_vbm.csv contains vbm data for WI which is missing from the EAVS files#
e <- rbind.fill(e08, e12, e14, e16, e18, read_csv("wi_vbm.csv") %>% dplyr::select(-cntynm))
##historical policies##
p <- read_csv("context_data_1992_2020_latest.csv")[,-1] %>%
mutate(vbm_all=ifelse(stpost=="CA" & cntynm=="Alpine" & year>=1996, 1, vbm_all),
vbm_all=ifelse(stpost=="CA" & cntynm=="Plumas" & year>=2016, 1, vbm_all),
vbm_noex=ifelse(stpost=="KS" & year==2020, 1, vbm_noex),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0")) %>%
filter(!(statenm=="Minnesota" & year==1992)) #no info on which counties experimented with no excuse VBM that year
#election day registration plus additions based on
#https://www.ncsl.org/research/elections-and-campaigns/same-day-registration.aspx
#and media searches to identify when first went into effect
edr <- read_dta("edr_onlinereg_1978_2018.dta") %>%
rbind.fill(filter(., year==2018) %>% mutate(year=2020)) %>%
mutate(edr2=ifelse(statenm=="Colorado" & year>2012, 1, edr2),
edr2=ifelse(statenm=="Connecticut" & year>2012, 1, edr2),
edr2=ifelse(statenm=="District of Columbia" & year>2010, 1, edr2),
edr2=ifelse(statenm=="Hawaii" & year>2016, 1, edr2),
edr2=ifelse(statenm=="Illinois" & year>2014, 1, edr2),
edr2=ifelse(statenm=="Maryland" & year>2018, 1, edr2),
edr2=ifelse(statenm=="Michigan" & year>2018, 1, edr2),
edr2=ifelse(statenm=="Nevada" & year>2019, 1, edr2),
edr2=ifelse(statenm=="Utah" & year>2016, 1, edr2),
edr2=ifelse(statenm=="Vermont" & year>2017, 1, edr2),
edr2=ifelse(statenm=="Washington" & year>2018, 1, edr2),
stfips=str_pad(as.character(fipsst), side="left", width=2, pad="0")) %>%
filter(statenm!="United States") %>%
dplyr::select(year, statenm, stfips, edr2) %>%
left_join(read_csv("onreg_implementation.csv")) %>%
mutate(first_onreg_year=ifelse(is.na(first_onreg_year), 2050, first_onreg_year),
onreg=as.integer(year>=first_onreg_year),
edr=edr2) %>%
dplyr::select(-edr2, -first_onreg_year)
p <- left_join(p, edr) %>%
mutate(onreg=ifelse(statenm=="Missouri" & year>=2014 & cntynm=="Boone", 1, onreg),
cntynm=str_replace(cntynm, " city", "")) %>%
mutate(cntynm=str_replace(cntynm, " Parish", ""))
#CVAP with eligibility imputed#
c <- read_csv("vap_cvap_1990_2020_imputed.csv")[,-1] %>%
mutate(cvap=ifelse(is.na(cvap), exp(cvap_log_hat), cvap))
c.ak <- filter(c, stfips %in% "02") %>%
group_by(year, stfips) %>%
dplyr::summarize(totpop=sum(totpop, na.rm=T), vap=sum(vap, na.rm=T), cvap=sum(cvap, na.rm=T)) %>%
mutate(statenm="Alaska",
geoid="00002",
stpost="AK")
c <- rbind.fill(c, c.ak)
##identify earliest dates of reforms in each state and county##
#universal vote-by-mail (plus vbm % prior to switch)#
switch_vbm_all <- filter(p, vbm_all==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
mutate(year=year-4) %>%
mutate(year=ifelse(year==2016 & stpost=="VT", 2018, year)) %>% #VT 2016 VBM data are missing#
#identify vbm percentage for last election before the switch#
left_join(read_csv("WA-County-VotesCast.csv") %>% mutate(stpost="WA"), by=c("stpost","cntynm"="County","year"="Year")) %>%
dplyr::select(stfips:stpost,AbsenteeCast) %>%
dplyr::rename(vbm=AbsenteeCast) %>%
left_join(read_csv("or_reg_1996.csv") %>%
mutate(stfips=as.character(stfips),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=paste0(stfips, cntyfips)) %>%
dplyr::select(year:cntynm, vbm_returned, stfips)) %>%
mutate(vbm=ifelse(!is.na(vbm_returned), vbm_returned, vbm)) %>%
dplyr::select(-vbm_returned) %>%
left_join(read_csv("ca_last_vbm.csv"), by=c("stpost","cntynm","year"), suffix=c("",".ca")) %>%
mutate(vbm=ifelse(!is.na(vbm.ca), vbm.ca, vbm)) %>%
dplyr::select(-vbm.ca) %>%
left_join(e, by=c("year","stpost","geoid"), suffix=c("",".eavs")) %>%
mutate(vbm=ifelse(!is.na(vbm.eavs), vbm.eavs, vbm)) %>%
dplyr::select(-vbm.eavs) %>%
filter(!is.na(vbm))
#no excuse vote-by-mail#
switch_vbm_noex <- filter(p, vbm_noex==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
dplyr::rename(first_yr_vbm_noex=year)
#universal vote-by-mail applications#
switch_vbm_app <- filter(p, vbm_app==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
dplyr::rename(first_yr_vbm_app=year)
#permanent vote-by-mail list#
switch_vbm_perm <- read_csv("switch_vbm_perm.csv")
#automatic voter registration#
switch_avr <- filter(p, avr==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
dplyr::rename(first_yr_avr=year)
#election day registration#
switch_edr <- filter(p, edr==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
dplyr::rename(first_yr_edr=year)
#early voting#
switch_early <- filter(p, early==1) %>%
group_by(geoid) %>%
filter(year==min(year)) %>%
dplyr::select(stfips:stpost) %>%
dplyr::rename(first_yr_early=year)
##David Leip Atlas registration and turnout data##
#AK is treated as an entire state because it has no permanent counties#
d92 <- read_excel("TurnoutData1992.xlsx", sheet="County VTO")
d92 <- d92[,c(1:7,55:57)]
names(d92) <- c("cntynm","stpost","pop","vap","reg","ballots","turnout","stfips","cntyfips","geoid")
d92 <- filter(d92, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=1992) %>%
filter(stpost!="T")
d96 <- read_excel("TurnoutData1996.xlsx", sheet="County VTO")
d96 <- d96[,c(1:7,56:58)]
names(d96) <- c("cntynm","stpost","pop","vap","reg","ballots","turnout","stfips","cntyfips","geoid")
d96 <- filter(d96, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=1996) %>%
filter(stpost!="T")
d00 <- read_excel("TurnoutData2000.xlsx", sheet="County VTO")
d00 <- d00[,c(1:8,57:59)]
names(d00) <- c("cntynm","stpost","pop","vap","cvap","reg","ballots","turnout","stfips","cntyfips","geoid")
d00 <- filter(d00, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
geoid=ifelse(cntynm=="Alaska", "00002", geoid),
year=2000) %>%
filter(stpost!="T")
d04 <- read_excel("TurnoutData2004.xlsx", sheet="County VTO")
d04 <- d04[,c(1:8,48:50)]
names(d04) <- c("cntynm","stpost","pop","vap","cvap","reg","ballots","turnout","stfips","cntyfips","geoid")
d04 <- filter(d04, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=2004) %>%
filter(stpost!="T")
d08 <- read_excel("TurnoutData2008.xlsx", sheet="County VTO")
d08 <- d08[,c(1:9,44:46,51,59:61)]
names(d08) <- c("cntynm","stpost","pop","vap","cvap","reg","actreg","ballots","turnout","stfips","cntyfips","geoid",
"edr","in_person_votes","early_votes","absentee")
d08 <- filter(d08, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
stpost=ifelse(cntynm=="Alaska", "AK", stpost),
absentee=parse_number(absentee),
year=2008) %>%
filter(stpost!="T")
d12 <- read_excel("TurnoutData2012.xlsx", sheet="County VTO")
d12 <- d12[,c(1:9,49:51,56,63:65)]
names(d12) <- c("cntynm","stpost","pop","vap","cvap","reg","actreg","ballots","turnout","stfips","cntyfips","geoid",
"edr","in_person_votes","early_votes","absentee")
d12 <- filter(d12, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=2012) %>%
filter(stpost!="T")
d16 <- read_excel("TurnoutData2016.xlsx", sheet="County VTO")
d16 <- d16[,c(1:9,50:52,57,64:66,68)]
names(d16) <- c("cntynm","stpost","pop","vap","cvap","reg","actreg","ballots","turnout","stfips","cntyfips","geoid",
"edr","in_person_votes","early_votes","absentee","mail")
d16 <- filter(d16, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=2016) %>%
filter(stpost!="T")
d20 <- read_excel("TurnoutData2020.xlsx", sheet="County VTO")
d20 <- d20[,c(1:9,50:52,57,64:66,68)]
names(d20) <- c("cntynm","stpost","pop","vap","cvap","reg","actreg","ballots","turnout","stfips","cntyfips","geoid",
"edr","in_person_votes","early_votes","absentee","mail")
d20 <- filter(d20, !is.na(cntynm), stpost!="AK") %>%
mutate(stpost=ifelse(cntynm=="Alaska", "AK", stpost),
stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"),
year=2020) %>%
filter(stpost!="T")
#fill in missing registration with data from SoS websites#
extra.reg <- read_csv("misc_missing_reg.csv") %>%
mutate(stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=3, pad="0"))
extra.reg.fl <- read_csv("eavs_by_county_2016.csv") %>%
filter(State=="FL") %>%
dplyr::select(FIPSCode, State, JurisdictionName, A3a, A3b) %>%
dplyr::rename(stpost=State, cntynm=JurisdictionName, geoid=FIPSCode, actreg=A3a, inactreg=A3b) %>%
mutate(geoid=str_sub(geoid, 1, 5), actreg=parse_number(actreg), inactreg=parse_number(inactreg),
reg=actreg+inactreg, year=2016) %>%
dplyr::select(-inactreg)
extra.reg <- rbind.fill(extra.reg, extra.reg.fl)
#combine the files#
d <- rbind.fill(d92,d96,d00,d04,d08,d12,d16,d20) %>%
dplyr::select(-c(vap, cvap)) %>%
left_join(dplyr::select(c, -c(cntynm, stpost, stfips, cntyfips)), by=c("year","geoid")) %>%
left_join(dplyr::select(extra.reg, -c(stpost, statenm, cntynm, stfips, cntyfips)),
by=c("year","geoid"), suffix=c("",".xtra")) %>%
mutate(reg=ifelse(!is.na(reg.xtra), reg.xtra, reg),
actreg=ifelse(!is.na(actreg.xtra), actreg.xtra, actreg),
cntynm=str_replace(cntynm, "Desoto", "DeSoto"),
cntyfips=ifelse(stpost=="AK", "000", cntyfips)) %>%
#misc fixes to data problems#
mutate(cntynm=ifelse(cntynm=="Dade" & stpost=="FL", "Miami-Dade", cntynm)) %>%
mutate(cntynm=ifelse(cntynm=="Dona Ana", "Do?a Ana", cntynm)) %>%
mutate(cntynm=ifelse(cntynm=="Lac Qui Parle", "Lac qui Parle", cntynm)) %>%
mutate(cntynm=ifelse(cntynm=="Dekalb" & stpost=="IN", "DeKalb", cntynm)) %>%
mutate(cntynm=ifelse(cntynm=="Dewitt" & stpost=="TX", "DeWitt", cntynm)) %>%
mutate(cntynm=ifelse(cntynm=="La Salle" & stpost=="LA", "LaSalle", cntynm)) %>%
mutate(cntynm=ifelse(geoid=="24510" & stpost=="MD", "Baltimore City", cntynm)) %>%
mutate(cntynm=ifelse(geoid=="29510" & stpost=="MO", "St. Louis City", cntynm)) %>%
mutate(cntynm=ifelse(geoid=="51515" & stpost=="VA", "Bedford City", cntynm)) %>%
dplyr::select(-reg.xtra, -actreg.xtra) %>%
left_join(dplyr::select(p, -cntynm), by=c("year","stpost","stfips","cntyfips","geoid"), suffix=c("_votes","")) %>%
#clean turnout and calculate turnout rates
mutate(turnout=ifelse(turnout==0, NA, turnout),
active=(is.na(reg)|reg==0) & !is.na(actreg) & actreg>0,
topoffice=(is.na(ballots)|ballots==0) & !is.na(turnout)) %>%
mutate(reg_all=ifelse(active, actreg, reg),
vote_all=ifelse(topoffice, turnout, ballots),
statenm=ifelse(cntynm=="Alaska", "Alaska", statenm)) %>%
mutate(reg_all=ifelse(active, actreg, reg)) %>%
mutate(turnout=ifelse(turnout==0, NA, turnout),
active=(is.na(reg)|reg==0) & !is.na(actreg) & actreg>0,
topoffice=(is.na(ballots)|ballots==0) & !is.na(turnout)) %>%
mutate(reg_all=ifelse(active, actreg, reg),
vote_all=ifelse(topoffice, turnout, ballots),
statenm=ifelse(cntynm=="Alaska", "Alaska", statenm)) %>%
#misc recodes
mutate(pcto_all=vote_all/reg_all,
pcto=turnout/reg_all,
pcto.elig=turnout/cvap,
yr1992=year==1992,
yr1996=year==1996,
yr2000=year==2000,
yr2004=year==2004,
yr2008=year==2008,
yr2012=year==2012,
yr2016=year==2016,
yr2020=year==2020,
cases_mn_centered=cases_mn - mean(cases_mn, na.rm=T),
deaths_mn_centered=deaths_mn - mean(deaths_mn, na.rm=T),
cases_sum_centered=cases_sum - mean(cases_sum, na.rm=T),
deaths_sum_centered=deaths_sum - mean(deaths_sum, na.rm=T),
avr=ifelse(statenm %in% c("Georgia","Oregon") & year<2016, 0, avr),
avr=ifelse(statenm %in% "Delaware" & year<=2008, 0, avr),
yrcount=as.integer(as.factor(year))-1) %>%
mutate(pcto=ifelse(is.infinite(pcto), NA, pcto),
pcto.elig=ifelse(is.infinite(pcto.elig), NA, pcto.elig)) %>%
mutate(pcto.elig=ifelse(pcto.elig>1, 1, pcto.elig))
#vermont votes where missing in 2018
vt_extra <- read_csv("vt_2018_votes.csv") %>%
mutate(stfips=str_pad(stfips, side="left", width=2, pad="0"),
cntyfips=str_pad(cntyfips, side="left", width=3, pad="0"),
geoid=str_pad(geoid, side="left", width=5, pad="0"))
switch_vbm_all <- left_join(switch_vbm_all,
rbind.fill(d, vt_extra) %>% dplyr::select(cntynm, stfips:year, statenm, vote_all)) %>%
mutate(pcvbm=vbm/vote_all,
first_yr_vbm_all=year + 4 - year%%4) %>%
dplyr::select(-vote_all, -year)
#create lag and lead variables for event studies#
d <- left_join(d, switch_vbm_all) %>%
left_join(switch_vbm_app) %>%
left_join(switch_vbm_noex) %>%
left_join(switch_vbm_perm) %>%
left_join(switch_avr) %>%
left_join(switch_edr) %>%
left_join(switch_early) %>%
mutate(pcvbm=ifelse(vbm_all==1, pcvbm, 0),
cycles_to_vbm_all=ifelse(is.na(first_yr_vbm_all), -1, (year-first_yr_vbm_all)/4),
cycles_to_vbm_app=ifelse(is.na(first_yr_vbm_app), -1, (year-first_yr_vbm_app)/4),
cycles_to_vbm_noex=ifelse(is.na(first_yr_vbm_noex), -1, (year-first_yr_vbm_noex)/4),
cycles_to_vbm_perm=ifelse(is.na(first_yr_vbm_perm), -1, (year-first_yr_vbm_perm)/4),
cycles_to_avr=ifelse(is.na(first_yr_avr), -1, (year-first_yr_avr)/4),
cycles_to_edr=ifelse(is.na(first_yr_edr), -1, (year-first_yr_edr)/4),
cycles_to_early=ifelse(is.na(first_yr_early), -1, (year-first_yr_early)/4)) %>%
mutate(vbm_perm=as.integer(cycles_to_vbm_perm>=0),
vbm_all_m3=cycles_to_vbm_all<=-3,
vbm_all_m2=cycles_to_vbm_all==-2,
vbm_all_m1=cycles_to_vbm_all==-1,
vbm_all_p0=cycles_to_vbm_all==0,
vbm_all_p1=cycles_to_vbm_all==1,
vbm_all_p2=cycles_to_vbm_all>=2,
vbm_noex_m3=cycles_to_vbm_noex<=-3,
vbm_noex_m2=cycles_to_vbm_noex==-2,
vbm_noex_m1=cycles_to_vbm_noex==-1,
vbm_noex_p0=cycles_to_vbm_noex==0,
vbm_noex_p1=cycles_to_vbm_noex==1,
vbm_noex_p2=cycles_to_vbm_noex>=2,
vbm_perm_m3=cycles_to_vbm_perm<=-3,
vbm_perm_m2=cycles_to_vbm_perm==-2,
vbm_perm_m1=cycles_to_vbm_perm==-1,
vbm_perm_p0=cycles_to_vbm_perm==0,
vbm_perm_p1=cycles_to_vbm_perm==1,
vbm_perm_p2=cycles_to_vbm_perm>=2,
vbm_app_m3=cycles_to_vbm_app<=-3,
vbm_app_m2=cycles_to_vbm_app==-2,
vbm_app_m1=cycles_to_vbm_app==-1,
vbm_app_p0=cycles_to_vbm_app>=0,
avr_m3=cycles_to_avr<=-3,
avr_m2=cycles_to_avr==-2,
avr_m1=cycles_to_avr==-1,
avr_p0=cycles_to_avr==0,
avr_p1=cycles_to_avr>=1,
edr_m3=cycles_to_edr<=-3,
edr_m2=cycles_to_edr==-2,
edr_m1=cycles_to_edr==-1,
edr_p0=cycles_to_edr==0,
edr_p1=cycles_to_edr==1,
edr_p2=cycles_to_edr>=2,
early_m3=cycles_to_early<=-3,
early_m2=cycles_to_early==-2,
early_m1=cycles_to_early==-1,
early_p0=cycles_to_early==0,
early_p1=cycles_to_early==1,
early_p2=cycles_to_early>=2) %>%
mutate(vbm_perm=ifelse(vbm_all==1, 1, vbm_perm)) %>%
filter(cntynm!="Overseas")
